library(Rfastp)
library(qckitfastq)
library(plotly)
library(plyr)
folder <- "../01_data"
files <- list.files(path = folder, pattern = ".gz", full.names = T)
names(files) <- gsub(pattern = "\\.fastq.gz", replacement = "", x = basename(files))
Rfastpqc_SE <- function(file, outdir, name, ad1) {
res <- rfastp(
read1 = file,
outputFastq = paste(outdir, name, sep = "/"),
thread = 4,
cutMeanQual = 20,
cutFrontMeanQual = 20,
cutTailMeanQual = 20,
cutLowQualTail = T,
cutLowQualFront = T,
qualityFiltering = T,
maxNfilter = 5,
qualityFilterPhred = 20,
qualityFilterPercent = 40,
lengthFiltering = T,
minReadLength = 15,
maxIndexMismatch = 1,
adapterTrimming = TRUE,
adapterSequenceRead1 = ad1
)
# file.remove(paste0(outdir, "/", name, ".html"))
return(res)
}
res <- parallel::mclapply(setNames(names(files), names(files)), function(x) {
qc_SE(file = files[[x]], outdir = "output/", name = x, ad1 = "TGGAATTCTCGGGTGCCAAGG")
}, mc.preschedule = F, mc.cores = 16)
json_files <- list.files(path = "output", pattern = "json", full.names = T)
names(json_files) <- gsub(pattern = "\\.json", replacement = "", x = basename(json_files))
trim_files <- list.files(path = "output", pattern = "gz", full.names = T)
names(trim_files) <- gsub(pattern = "\\_R1.fastq.gz", replacement = "", x = basename(trim_files))
#' Make general information data frame from
#'
#' @param json A JSON file with QC information
#'
#' @return A data frame with FASTQ file information
#' @export
#'
#' @examples
make_general_df <- function(json) {
js <- rjson::fromJSON(file = json)
# pkg <- as.character(packageVersion("Rfastp"))
seq <- js$read1_before_filtering$total_cycles
seq_type <- ifelse(test = any(grepl(pattern = "read2", x = names(js))), yes = "paired-end", no = "single-end")
cycle <- paste0(seq_type, " (", seq, " cycles)")
mean_len_bf <- js$summary$before_filtering$read1_mean_length
mean_len_af <- js$summary$after_filtering$read1_mean_length
dup_rate <- ifelse(test = seq_type == "paired-end",
yes = paste0(round(x = (js$duplication$rate * 100), digits = 2), "%"),
no = paste0(
round(x = (js$duplication$rate * 100), digits = 2),
"% (may be overestimated since this is SE data)"
)
)
df <- data.frame(
test = c(
# "Rfastp version",
"Sequencing",
"Mean length before filtering",
"Mean length after filtering",
"Duplication rate"
),
value = c(
# pkg,
cycle,
mean_len_bf,
mean_len_af,
dup_rate
)
)
return(df)
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
knitr::kable(make_general_df(json = json_files[[x]]), col.names = NULL)
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 31 |
| Mean length after filtering | 30 |
| Duplication rate | 8.93% (may be overestimated since this is SE data) |
$SRR13129037
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 30 |
| Mean length after filtering | 29 |
| Duplication rate | 5.89% (may be overestimated since this is SE data) |
$SRR13129038
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 27 |
| Mean length after filtering | 27 |
| Duplication rate | 4.62% (may be overestimated since this is SE data) |
$SRR13129039
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 28 |
| Mean length after filtering | 28 |
| Duplication rate | 5.01% (may be overestimated since this is SE data) |
$SRR13129040
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 29 |
| Mean length after filtering | 28 |
| Duplication rate | 5.1% (may be overestimated since this is SE data) |
$SRR13129041
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 31 |
| Mean length after filtering | 28 |
| Duplication rate | 8.65% (may be overestimated since this is SE data) |
$SRR13129042
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 33 |
| Mean length after filtering | 30 |
| Duplication rate | 10.17% (may be overestimated since this is SE data) |
$SRR13129043
| Sequencing | single-end (43 cycles) |
| Mean length before filtering | 33 |
| Mean length after filtering | 30 |
| Duplication rate | 11.27% (may be overestimated since this is SE data) |
make_before_filt_df <- function(json) {
js <- rjson::fromJSON(file = json)
tot_reads <- js$read1_before_filtering$total_reads
tot_bases <- js$read1_before_filtering$total_bases
q_20_bases <- js$read1_before_filtering$q20_bases
q_20_rate <- js$summary$before_filtering$q20_rate * 100
q_30_bases <- js$read1_before_filtering$q30_bases
q_30_rate <- js$summary$before_filtering$q30_rate * 100
gc <- paste0(round(x = js$summary$before_filtering$gc_content * 100, digits = 2), "%")
df <- data.frame(
test = c(
"Total reads",
"Total bases",
"Q20 bases",
"Q30 bases",
"GC content"
),
value = c(
tot_reads,
tot_bases,
paste0(q_20_bases, " (", q_20_rate, "%)"),
paste0(q_30_bases, " (", q_30_rate, "%)"),
gc
)
)
return(df)
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
knitr::kable(make_before_filt_df(json = json_files[[x]]), col.names = NULL)
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
| Total reads | 11185006 |
| Total bases | 357252971 |
| Q20 bases | 334375297 (93.5962%) |
| Q30 bases | 314060291 (87.9098%) |
| GC content | 53.47% |
$SRR13129037
| Total reads | 7867867 |
| Total bases | 240054600 |
| Q20 bases | 225451404 (93.9167%) |
| Q30 bases | 212505540 (88.5238%) |
| GC content | 52.17% |
$SRR13129038
| Total reads | 10194925 |
| Total bases | 281230897 |
| Q20 bases | 266372556 (94.7167%) |
| Q30 bases | 252189890 (89.6736%) |
| GC content | 52.86% |
$SRR13129039
| Total reads | 7830099 |
| Total bases | 224453362 |
| Q20 bases | 210404041 (93.7407%) |
| Q30 bases | 197933302 (88.1846%) |
| GC content | 53.33% |
$SRR13129040
| Total reads | 8208659 |
| Total bases | 244636315 |
| Q20 bases | 230210355 (94.1031%) |
| Q30 bases | 217184942 (88.7787%) |
| GC content | 52.93% |
$SRR13129041
| Total reads | 5001369 |
| Total bases | 156293132 |
| Q20 bases | 143287472 (91.6787%) |
| Q30 bases | 132726493 (84.9215%) |
| GC content | 50.94% |
$SRR13129042
| Total reads | 4501226 |
| Total bases | 148716550 |
| Q20 bases | 136504656 (91.7885%) |
| Q30 bases | 126439280 (85.0203%) |
| GC content | 50.99% |
$SRR13129043
| Total reads | 4384159 |
| Total bases | 146604195 |
| Q20 bases | 134363573 (91.6506%) |
| Q30 bases | 124460441 (84.8956%) |
| GC content | 50.92% |
make_after_filt_df <- function(json) {
js <- rjson::fromJSON(file = json)
tot_reads <- js$read1_after_filtering$total_reads
tot_bases <- js$read1_after_filtering$total_bases
q_20_bases <- js$read1_after_filtering$q20_bases
q_20_rate <- js$summary$after_filtering$q20_rate * 100
q_30_bases <- js$read1_after_filtering$q30_bases
q_30_rate <- js$summary$after_filtering$q30_rate * 100
gc <- paste0(round(x = js$summary$after_filtering$gc_content * 100, digits = 2), "%")
df <- data.frame(
test = c(
"Total reads",
"Total bases",
"Q20 bases",
"Q30 bases",
"GC content"
),
value = c(
tot_reads,
tot_bases,
paste0(q_20_bases, " (", q_20_rate, "%)"),
paste0(q_30_bases, " (", q_30_rate, "%)"),
gc
)
)
return(df)
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
knitr::kable(make_after_filt_df(json = json_files[[x]]), col.names = NULL)
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
| Total reads | 10827559 |
| Total bases | 329736216 |
| Q20 bases | 314144334 (95.2714%) |
| Q30 bases | 297339513 (90.175%) |
| GC content | 53.6% |
$SRR13129037
| Total reads | 7450311 |
| Total bases | 216714804 |
| Q20 bases | 207087385 (95.5576%) |
| Q30 bases | 196728119 (90.7774%) |
| GC content | 52.16% |
$SRR13129038
| Total reads | 9452835 |
| Total bases | 261140391 |
| Q20 bases | 250299658 (95.8487%) |
| Q30 bases | 238272627 (91.2431%) |
| GC content | 52.65% |
$SRR13129039
| Total reads | 7176073 |
| Total bases | 202756457 |
| Q20 bases | 193614861 (95.4913%) |
| Q30 bases | 183785082 (90.6433%) |
| GC content | 53.24% |
$SRR13129040
| Total reads | 7816079 |
| Total bases | 225710316 |
| Q20 bases | 215794836 (95.607%) |
| Q30 bases | 205089922 (90.8642%) |
| GC content | 52.91% |
$SRR13129041
| Total reads | 4737687 |
| Total bases | 136251428 |
| Q20 bases | 128073054 (93.9976%) |
| Q30 bases | 120073758 (88.1266%) |
| GC content | 50.98% |
$SRR13129042
| Total reads | 4297238 |
| Total bases | 131971658 |
| Q20 bases | 124260094 (94.1567%) |
| Q30 bases | 116608772 (88.359%) |
| GC content | 51% |
$SRR13129043
| Total reads | 4197599 |
| Total bases | 129431304 |
| Q20 bases | 121890795 (94.1741%) |
| Q30 bases | 114301250 (88.3104%) |
| GC content | 50.92% |
make_filt_df <- function(json) {
js <- rjson::fromJSON(file = json)
reads <- js$summary$before_filtering$total_reads
read_pass <- js$filtering_result$passed_filter_reads
read_pass_pct <- round((read_pass / reads) * 100, 2)
read_low_Q <- js$filtering_result$low_quality_reads
read_low_Q_pct <- round((read_low_Q / reads) * 100, 2)
read_with_N <- js$filtering_result$too_many_N_reads
read_with_N_pct <- round((read_with_N / reads) * 100, 2)
read_short <- js$filtering_result$too_short_reads
read_short_pct <- round((read_short / reads) * 100, 2)
read_long <- js$filtering_result$too_long_reads
read_long_pct <- round((read_pass / reads) * 100, 2)
df <- data.frame(
test = c(
"Reads passed filters",
"Reads with low quality",
"Reads with too many N",
"Reads too short",
"Reads too long"
),
value = c(
paste0(read_pass, " (", read_pass_pct, "%)"),
paste0(read_low_Q, " (", read_low_Q_pct, "%)"),
paste0(read_with_N, " (", read_with_N_pct, "%)"),
paste0(read_short, " (", read_short_pct, "%)"),
paste0(read_long, " (", read_long_pct, "%)")
)
)
return(df)
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
knitr::kable(make_filt_df(json = json_files[[x]]), col.names = NULL)
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
| Reads passed filters | 10827559 (96.8%) |
| Reads with low quality | 114349 (1.02%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 243098 (2.17%) |
| Reads too long | 0 (96.8%) |
$SRR13129037
| Reads passed filters | 7450311 (94.69%) |
| Reads with low quality | 71235 (0.91%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 346321 (4.4%) |
| Reads too long | 0 (94.69%) |
$SRR13129038
| Reads passed filters | 9452835 (92.72%) |
| Reads with low quality | 61669 (0.6%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 680421 (6.67%) |
| Reads too long | 0 (92.72%) |
$SRR13129039
| Reads passed filters | 7176073 (91.65%) |
| Reads with low quality | 66473 (0.85%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 587553 (7.5%) |
| Reads too long | 0 (91.65%) |
$SRR13129040
| Reads passed filters | 7816079 (95.22%) |
| Reads with low quality | 65540 (0.8%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 327040 (3.98%) |
| Reads too long | 0 (95.22%) |
$SRR13129041
| Reads passed filters | 4737687 (94.73%) |
| Reads with low quality | 66542 (1.33%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 197140 (3.94%) |
| Reads too long | 0 (94.73%) |
$SRR13129042
| Reads passed filters | 4297238 (95.47%) |
| Reads with low quality | 59790 (1.33%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 144198 (3.2%) |
| Reads too long | 0 (95.47%) |
$SRR13129043
| Reads passed filters | 4197599 (95.74%) |
| Reads with low quality | 66768 (1.52%) |
| Reads with too many N | 0 (0%) |
| Reads too short | 119792 (2.73%) |
| Reads too long | 0 (95.74%) |
make_dup_plot <- function(json) {
js <- rjson::fromJSON(file = json)
bars <- js$duplication$histogram
gc <- js$duplication$mean_gc
xaxis <- 1:length(bars)
data <- data.frame(
Duplicate = round(bars / sum(bars) * 100, 2),
GC = round(gc * 100, 2),
x = xaxis
)
data$GC[data$GC == 0] <- ""
p <- plot_ly(data, x = ~x) %>%
add_trace(
y = ~Duplicate,
type = "bar",
name = "Read percent",
hoverinfo = "text+x",
hovertext = paste(
"<b>", data$Duplicate, "</b>"
)
) %>%
add_lines(
y = ~GC,
name = "Mean GC ratio (%)",
line = list(color = "red", width = 3),
hoverinfo = "text+x",
hovertext = paste(
"<b>", data$GC, "</b>"
)
) %>%
layout(
title = paste0("Duplication rate: ", round(js$duplication$rate * 100, 2), "%"),
xaxis = list(title = "Duplication level"),
yaxis = list(title = "Reads percent & GC ratio (%)"),
hovermode = "x closest"
)
print(htmltools::tagList(p))
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_dup_plot(json = json_files[[x]])
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
make_read_qual_plot <- function(json, which = "before") {
js <- rjson::fromJSON(file = json)
qual <- NULL
if (which == "before") {
qual <- js$read1_before_filtering$quality_curves
} else {
qual <- js$read1_after_filtering$quality_curves
}
data <- data.frame(
pos = 1:length(qual$mean),
Aa = qual$A,
Tt = qual$T,
Gg = qual$G,
Cc = qual$C,
Mm = qual$mean
)
data$Q20 <- 20
data$Q28 <- 8
data$Q50 <- 18
cols <- rcartocolor::carto_pal(n = 10, name = "Safe")[c(2, 1, 3, 4, 10)]
p <- plot_ly(data, x = ~pos) %>%
add_trace(
y = ~Q20, name = "Not good", fillcolor = "rgba(255,0,0, 0.2)", type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Q28, name = "OK", fillcolor = "rgba(255,255,0,0.2)", type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Q50, name = "Good", fillcolor = "rgba(0,128,0,0.2)", type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_lines(
y = ~Aa,
name = "A",
line = list(color = cols[1], width = 2),
hoverinfo = "text",
hovertext = paste("<b>", data$Aa, "</b>")
) %>%
add_lines(
y = ~Tt,
name = "T",
line = list(color = cols[2], width = 2),
hoverinfo = "text",
hovertext = paste("<b>", data$Tt, "</b>")
) %>%
add_lines(
y = ~Gg,
name = "G",
line = list(color = cols[3], width = 2),
hoverinfo = "text",
hovertext = paste("<b>", data$Gg, "</b>")
) %>%
add_lines(
y = ~Cc,
name = "C",
line = list(color = cols[4], width = 2),
hoverinfo = "text",
hovertext = paste("<b>", data$Cc, "</b>")
) %>%
add_lines(
y = ~Mm,
name = "Mean",
line = list(color = "black", width = 3, dash = "dot"),
hoverinfo = "text",
hovertext = paste("<b>", data$Mm, "</b>")
) %>%
layout(
title = "Per base sequence quality",
xaxis = list(title = "Position in read (bp)"),
yaxis = list(title = "Quality score"),
hovermode = "x closest"
)
print(htmltools::tagList(p))
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_read_qual_plot(json = json_files[[x]], which = "before")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_read_qual_plot(json = json_files[[x]], which = "after")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
make_base_ratio_line_plot <- function(json, which = "before") {
js <- rjson::fromJSON(file = json)
qual <- NULL
if (which == "before") {
qual <- js$read1_before_filtering$content_curves
} else {
qual <- js$read1_after_filtering$content_curves
}
data <- data.frame(
pos = 1:length(qual$A),
Aa = qual$A * 100,
Tt = qual$T * 100,
Gg = qual$G * 100,
Cc = qual$C * 100,
GC = qual$GC * 100,
Nn = qual$N * 100
)
cols <- rcartocolor::carto_pal(n = 11, name = "Safe")[c(2, 1, 3, 4, 10)]
p <- plot_ly(data, x = ~pos) %>%
add_lines(
y = ~Aa,
name = "A",
line = list(color = cols[1], width = 2),
hoverinfo = "text+x",
hovertext = paste("<b>", data$Aa, "</b>")
) %>%
add_lines(
y = ~Tt,
name = "T",
line = list(color = cols[2], width = 2),
hoverinfo = "text+x",
hovertext = paste("<b>", data$Tt, "</b>")
) %>%
add_lines(
y = ~Gg,
name = "G",
line = list(color = cols[3], width = 2),
hoverinfo = "text+x",
hovertext = paste("<b>", data$Gg, "</b>")
) %>%
add_lines(
y = ~Cc,
name = "C",
line = list(color = cols[4], width = 2),
hoverinfo = "text+x",
hovertext = paste("<b>", data$Cc, "</b>")
) %>%
add_lines(
y = ~GC,
name = "GC",
line = list(color = cols[5], width = 3, dash = "dot"),
hoverinfo = "text+x",
hovertext = paste("<b>", data$GC, "</b>")
) %>%
add_lines(
y = ~Nn,
name = "N",
line = list(color = "black", width = 3),
hoverinfo = "text+x",
hovertext = paste("<b>", data$Nn, "</b>")
) %>%
layout(
title = "Per base sequence content",
xaxis = list(title = "Position in read (bp)/ Cycle"),
yaxis = list(title = "Percentage (%)"),
hovermode = "x closest"
)
print(htmltools::tagList(p))
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_base_ratio_line_plot(json = json_files[[x]], which = "before")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_base_ratio_line_plot(json = json_files[[x]], which = "after")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
make_base_ratio_prop_plot <- function(json, which = "before") {
js <- rjson::fromJSON(file = json)
qual <- NULL
if (which == "before") {
qual <- js$read1_before_filtering$content_curves
} else {
qual <- js$read1_after_filtering$content_curves
}
data <- data.frame(
pos = 1:length(qual$A),
Aa = qual$A * 100,
Tt = qual$T * 100,
Gg = qual$G * 100,
Cc = qual$C * 100,
GC = qual$GC * 100,
Nn = qual$N * 100
)
cols <- rcartocolor::carto_pal(n = 11, name = "Safe")[c(2, 1, 3, 4, 10)]
p <- plot_ly(data, x = ~pos) %>%
add_trace(
y = ~Nn, name = "N", fillcolor = "black", type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Cc, name = "C", fillcolor = cols[4], type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Gg, name = "G", fillcolor = cols[3], type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Tt, name = "T", fillcolor = cols[2], type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_trace(
y = ~Aa, name = "A", fillcolor = cols[1], type = "scatter",
mode = "none", stackgroup = "one", hoverinfo = "skip"
) %>%
add_lines(
y = ~GC,
name = "GC",
line = list(color = cols[5], width = 3, dash = "dot"),
hoverinfo = "text",
hovertext = paste("<b>", data$GC, "</b>")
) %>%
layout(
title = "Per base sequence content",
xaxis = list(title = "Position in read (bp)/ Cycle"),
yaxis = list(title = "Percentage (%)"),
hovermode = "x closest"
)
print(htmltools::tagList(p))
}
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_base_ratio_prop_plot(json = json_files[[x]], which = "before")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
parallel::mclapply(setNames(names(json_files), names(json_files)), function(x) {
make_base_ratio_prop_plot(json = json_files[[x]], which = "after")
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
qckitfastqrun_qc <- function(fq) {
out <- list()
infile <- fq
fseq <- seqTools::fastqq(infile)
## Read length
read_len <- read_length(fseq)
# plot_read_length(read_len)
## Per base sequence quality
bs <- per_base_quality(infile)
# plot_per_base_quality(bs)
## Per read quality
prq <- per_read_quality(infile)
# plot_per_read_quality(prq)
## GC content
gc_df <- GC_content(infile)
# plot_GC_content(gc_df)
## Nucleotide read content
rc <- read_content(fseq)
# plot_read_content(rc)
## Kmer count
km <- kmer_count(infile, k = 6)
## Overrep reads
# overrep_reads <- overrep_reads(infile)
# plot_overrep_reads(overrep_reads)
## Overrep kmer
overkm <- overrep_kmer(infile, 7)
# plot_overrep_kmer(overkm)
## Adapter content
if (.Platform$OS.type != "windows") {
ac_sorted <- adapter_content(infile)
# plot_adapter_content(ac_sorted)
} else {
ac_sorted <- "adapter_content not available for Windows; skipping"
print("adapter_content not available for Windows; skipping")
}
out <- list(
read_length = read_len,
per_base_quality = bs,
per_read_quality = prq,
GC_content = gc_df,
read_content = rc,
kmer_count = km,
overrep_kmer = overkm,
overrep_reads = overrep_reads,
adapter_content = ac_sorted
)
return(out)
}
res1 <- parallel::mclapply(setNames(names(files), names(files)), function(x) {
run_qc(fq = files[[x]])
}, mc.preschedule = F, mc.cores = 4)
res2 <- parallel::mclapply(setNames(names(trim_files), names(trim_files)), function(x) {
run_qc(fq = trim_files[[x]])
}, mc.preschedule = F, mc.cores = 4)
#' Read length distribution plot
#'
#' @param before_trim A dataframe with 2 columns: read_length and num_reads
#' @param after_trim A dataframe with 2 columns: read_length and num_reads
#'
#' @return An interactive barplot
#' @export
#'
#' @examples
read_length_plot <- function(before_trim, after_trim) {
if (!colnames(before_trim) %in% c("read_length", "num_reads") |
!colnames(after_trim) %in% c("read_length", "num_reads")) {
message("Please check the columns of input")
}
raw <- before_trim
trim <- after_trim
raw$Group <- "Raw"
trim$Group <- "Trimmed"
p1 <- plot_ly(hoverinfo = "text", type = "bar", textposition = "auto") %>%
add_trace(
data = raw,
legendgroup = ~Group,
x = ~read_length, y = ~num_reads,
name = ~Group,
# text = ~`%Reads`,
marker = list(color = "red"),
hovertext = paste(
"<b>Length:</b> ", raw$read_length,
"<br><b>nReads:</b> ", raw$num_reads
# , "<br><b>%Reads:</b> ", t$`%Reads`
)
) %>%
layout(
title = "Histogram of read length distribution",
xaxis = list(title = "Reads length"),
yaxis = list(title = "No. of reads"),
hovermode = "x closest"
)
p2 <- plot_ly(hoverinfo = "text", type = "bar", textposition = "auto") %>%
add_trace(
data = trim,
legendgroup = ~Group,
x = ~read_length, y = ~num_reads,
name = ~Group,
# text = ~`%Reads`,
marker = list(color = "green"),
hovertext = paste(
"<b>Length:</b> ", trim$read_length,
"<br><b>nReads:</b> ", trim$num_reads
# , "<br><b>%Reads:</b> ", t$`%Reads`
)
) %>%
layout(
title = "Histogram of read length distribution",
xaxis = list(title = "Reads length"),
yaxis = list(title = "No. of reads"),
hovermode = "x closest"
)
p <- subplot(p1, p2, shareX = T, shareY = T)
print(htmltools::tagList(p))
}
parallel::mclapply(setNames(names(res1), names(res1)), function(x) {
read_length_plot(before_trim = res1[[x]]$read_length, after_trim = res2[[x]]$read_length)
}, mc.preschedule = F, mc.cores = 4)
$SRR13129036
$SRR13129037
$SRR13129038
$SRR13129039
$SRR13129040
$SRR13129041
$SRR13129042
$SRR13129043
report::cite_packages(session = sessionInfo())
devtools::session_info() %>%
details::details()
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.0.4 (2021-02-15)
os Ubuntu 16.04.7 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz Europe/Zurich
date 2021-09-20
─ Packages ───────────────────────────────────────────────────────────────────
package * version date lib source
assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.4)
bslib 0.2.5.1 2021-05-18 [1] CRAN (R 4.0.4)
cachem 1.0.5 2021-05-15 [1] CRAN (R 4.0.4)
callr 3.7.0 2021-04-20 [1] CRAN (R 4.0.4)
cli 3.0.1 2021-07-17 [1] CRAN (R 4.0.4)
clipr 0.7.1 2020-10-08 [1] CRAN (R 4.0.4)
colorspace 2.0-2 2021-06-24 [1] CRAN (R 4.0.4)
crayon 1.4.1 2021-02-08 [1] CRAN (R 4.0.4)
crosstalk 1.1.1 2021-01-12 [1] CRAN (R 4.0.4)
data.table 1.14.0 2021-02-21 [1] CRAN (R 4.0.4)
DBI 1.1.1 2021-01-15 [1] CRAN (R 4.0.4)
desc 1.3.0 2021-03-05 [1] CRAN (R 4.0.4)
details 0.2.1 2020-01-12 [1] CRAN (R 4.0.4)
devtools 2.4.2 2021-06-07 [1] CRAN (R 4.0.4)
digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.4)
dplyr 1.0.7 2021-06-18 [1] CRAN (R 4.0.4)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.0.4)
evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.4)
fansi 0.5.0 2021-05-25 [1] CRAN (R 4.0.4)
fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.0.4)
fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.4)
generics 0.1.0 2020-10-31 [1] CRAN (R 4.0.4)
ggplot2 * 3.3.5 2021-06-25 [1] CRAN (R 4.0.4)
glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.4)
gtable 0.3.0 2019-03-25 [1] CRAN (R 4.0.4)
htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.4)
htmlwidgets 1.5.3 2020-12-10 [1] CRAN (R 4.0.4)
httr 1.4.2 2020-07-20 [1] CRAN (R 4.0.4)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.0.4)
jsonlite 1.7.2 2020-12-09 [1] CRAN (R 4.0.4)
knitr 1.33 2021-04-24 [1] CRAN (R 4.0.4)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.0.4)
lifecycle 1.0.0 2021-02-15 [1] CRAN (R 4.0.4)
magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.4)
memoise 2.0.0 2021-01-26 [1] CRAN (R 4.0.4)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.0.4)
pillar 1.6.2 2021-07-29 [1] CRAN (R 4.0.4)
pkgbuild 1.2.0 2020-12-15 [1] CRAN (R 4.0.4)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.4)
pkgload 1.2.1 2021-04-06 [1] CRAN (R 4.0.4)
plotly * 4.9.4.1 2021-06-18 [1] CRAN (R 4.0.4)
plyr * 1.8.6 2020-03-03 [1] CRAN (R 4.0.4)
png 0.1-7 2013-12-03 [1] CRAN (R 4.0.4)
prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.4)
processx 3.5.2 2021-04-30 [1] CRAN (R 4.0.4)
ps 1.6.0 2021-02-28 [1] CRAN (R 4.0.4)
purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.4)
qckitfastq * 1.6.0 2020-10-27 [1] Bioconductor
R6 2.5.0 2020-10-28 [1] CRAN (R 4.0.4)
Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.0.4)
remotes 2.4.0 2021-06-02 [1] CRAN (R 4.0.4)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.0.4)
Rfastp * 1.0.0 2020-10-27 [1] Bioconductor
rjson 0.2.20 2018-06-08 [1] CRAN (R 4.0.4)
rlang 0.4.11 2021-04-30 [1] CRAN (R 4.0.4)
rmarkdown 2.9 2021-06-15 [1] CRAN (R 4.0.4)
rprojroot 2.0.2 2020-11-15 [1] CRAN (R 4.0.4)
RSeqAn 1.10.0 2020-10-27 [1] Bioconductor
sass 0.4.0 2021-05-12 [1] CRAN (R 4.0.4)
scales 1.1.1 2020-05-11 [1] CRAN (R 4.0.4)
sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.4)
stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.4)
stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.4)
testthat 3.0.4 2021-07-01 [1] CRAN (R 4.0.4)
tibble 3.1.3 2021-07-23 [1] CRAN (R 4.0.4)
tidyr 1.1.3 2021-03-03 [1] CRAN (R 4.0.4)
tidyselect 1.1.1 2021-04-30 [1] CRAN (R 4.0.4)
usethis 2.0.1 2021-02-10 [1] CRAN (R 4.0.4)
utf8 1.2.2 2021-07-24 [1] CRAN (R 4.0.4)
vctrs 0.3.8 2021-04-29 [1] CRAN (R 4.0.4)
viridisLite 0.4.0 2021-04-13 [1] CRAN (R 4.0.4)
withr 2.4.2 2021-04-18 [1] CRAN (R 4.0.4)
xfun 0.24 2021-06-15 [1] CRAN (R 4.0.4)
xml2 1.3.2 2020-04-23 [1] CRAN (R 4.0.4)
yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.4)
zlibbioc 1.36.0 2020-10-27 [1] Bioconductor
[1] /home/ubuntu/R/x86_64-pc-linux-gnu-library/4.0
[2] /usr/local/lib/R/site-library
[3] /usr/lib/R/site-library
[4] /usr/lib/R/library